# long 爬虫
# {2021/9/7}
# 本章目的：案例
import requests
import re

url = "https://movie.douban.com/top250"
die = {
    'User-Agent':'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/86.0.4240.198 Safari/537.36'
}
red = requests.get(url,headers=die)
# 预加载
obj = re.compile(r'<div class="item">.*?<span class="title">(?P<name>.*?)'
                 r'</span>.*?<br>(?P<year>.*?)&nbsp;.*?<span class="inq">'
                 r'(?P<title>.*?)</span>',re.S)

result = obj.finditer(red.text)
with open('a.txt','a',encoding='utf-8') as f:

    for i in result:
        # print(i.group("name"))
        # print(i.group("year").strip())
        # print(i.group("title"))
        dic = i.groupdict()
        dic['year'] = dic['year'].strip()
        f.writelines(str(dic)+"\n")

f.close()
print('ok')